1 Questions to consider

Thoughts from recent conversations (12/15/2021):

Game plan:

2 Prepare data

load('lvl0_snapshots/mea_acute_lvl0_2020-07-29.RData')

# filter out wllq == 0
dat <- mea_acute_lvl0[wllq == 1]
rm(mea_acute_lvl0)

# assign the cndx
dat[, conc := signif(conc, 3)]
dat[is.na(conc), .N, by = .(wllt, spid)] # this is all fine
dat[, cndx := frank(conc, ties.method = 'dense'), by = .(apid, spid, wllt, srcf, apid)] # dense will collapse the rep's

# Calculate the bvals
dat[wllt == 'n', .N, by = spid] # most DMSO, some water, no Media
dat[, bval.n2low := median(rval[wllt == 'n' | (wllt == 't' & cndx %in% c(1,2))], na.rm = T), by = .(acnm, apid)]
dat[, bval.2low := median(rval[(wllt == 't' & cndx %in% c(1,2))], na.rm = T), by = .(acnm, apid)]
dat[, bval.n := median(rval[wllt == 'n'], na.rm = T), by = .(acnm, apid)]

3 Compare the bvals

# Questions:
# - How does the bval of 2 low only compare to bval of n and 2 low?
#   Is either consistently lower or higher?
#   Is the tail of either thicker or thinner? (e.g. at least fewer really low balls with 2 low only?)

# Another question:
# should we just filter out edgewells?

# Start with the classic wmfr
acnmi <- 'CCTE_Shafer_MEA_acute_firing_rate_mean_weighted'
ggplot(dat[acnm == acnmi], aes(x = bval.n2low, y = bval.2low)) +
  geom_point()+
  geom_abline(slope = 1, intercept = 0)+
  ggtitle(paste0(acnmi,'\nBval of 2 Lowest conc\'s only vs\nBval of n wells + 2 lowest conc\'s'))

Observations:

Let’s check out another endpoint, one that has some low bval’s

dat[, .(median(bval.n2low)), by = .(acnm)][order(V1)]
acnmi <- 'CCTE_Shafer_MEA_acute_cross_correlation_area'
ggplot(dat[acnm == acnmi], aes(x = bval.n2low, y = bval.2low)) +
  geom_point()+
  geom_abline(slope = 1, intercept = 0)+
  ggtitle(paste0(acnmi,'\nBval of 2 Lowest conc\'s only vs\nBval of n wells + 2 lowest conc\'s'))

Oh wow, even here it’s not really any better!!

How to quickly visually verify that for all endpoints the bval of 2 low only isn’t really any better?

Not sure, but let’s do a quick t-test

# Get a table with 1 row per apid
bval.tb <- dat[, unique(.SD), .SDcols = c('acnm','apid','bval.n2low','bval.2low')]
t.test.tb <- data.table()
for (acnmi in unique(dat$acnm)) {
  res <- t.test(x = bval.tb[acnm == acnmi, bval.n2low],
                y = bval.tb[acnm == acnmi, bval.2low],
                alternative = 'two.sided',
                mu = 0, 
                paired = TRUE,
                var.equal = FALSE)
  add.tb <- data.table(acnm = acnmi,
                       statistic = res$statistic,
                       parameter = res$parameter,
                       p.value = res$p.value,
                       conf.int.lb = res$conf.int[1],
                       conf.int.ub = res$conf.int[2],
                       mean_of_the_differences = res$estimate[1])
  t.test.tb <- rbind(t.test.tb, add.tb)
}

t.test.tb
p <- ggplot(t.test.tb[!grepl('(LDH)|(AB)',acnm)], aes(text = acnm, x = mean_of_the_differences, y = p.value)) +
  geom_point()+
  scale_y_log10()+
  geom_hline(yintercept = 0.05)+
  ggtitle(paste0('Welch Two Sample Paired t-test of bval of n wells + 2 low concs vs bval of 2 low concs only\nfor the\n ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')

4 T-test by apid

# Do some ranking of the apids
use.bval.tb <- bval.tb[!grepl('(LDH)|(AB)',acnm)]
use.bval.tb[, apid_rank_in_acnm := frank(bval.n2low, ties.method = 'average'), by = .(acnm)]
use.bval.tb[, apid_med_rank := median(apid_rank_in_acnm), by = .(apid)]
use.bval.tb[, apid_min_rank := min(apid_rank_in_acnm), by = .(apid)]
use.bval.tb[, apid_med_bval.n2low := median(bval.n2low), by = .(apid)]
use.bval.tb[, apid_min_bval.n2low := min(bval.n2low), by = .(apid)]


apid.t.test.tb <- data.table()
for (apidi in unique(dat$apid)) {
  res <- t.test(x = use.bval.tb[apid == apidi, bval.n2low],
                y = use.bval.tb[apid == apidi, bval.2low],
                alternative = 'two.sided',
                mu = 0, 
                paired = TRUE,
                var.equal = FALSE)
  add.tb <- data.table(apid = apidi,
                       statistic = res$statistic,
                       parameter = res$parameter,
                       p.value = res$p.value,
                       conf.int.lb = res$conf.int[1],
                       conf.int.ub = res$conf.int[2],
                       mean_of_the_differences = res$estimate[1])
  apid.t.test.tb <- rbind(apid.t.test.tb, add.tb)
}
apid.t.test.tb <- merge(apid.t.test.tb, 
                        use.bval.tb[, unique(.SD), .SDcols = c('apid','apid_med_rank','apid_min_rank',
                                                               'apid_med_bval.n2low','apid_min_bval.n2low')],
                        by = 'apid', all.x = T)
apid.t.test.tb
p <- ggplot(apid.t.test.tb, aes(text = apid, x = apid_min_rank, y = p.value)) +
  geom_point(aes(color = mean_of_the_differences))+
  scale_y_log10()+
  geom_hline(yintercept = 0.05)+
  ggtitle(paste0('Welch Two Sample Paired t-test of\nbval of n wells + 2 low concs vs bval of 2 low concs only\nfor the ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')

What if I check it out by just the raw min bval, rather than the basing it on the rank, which doesn’t actually tell me if the apid is concerningly low?

p <- ggplot(apid.t.test.tb, aes(text = apid, x = apid_min_bval.n2low, y = p.value)) +
  geom_point(aes(color = mean_of_the_differences))+
  scale_y_log10()+
  geom_hline(yintercept = 0.05)+
  ggtitle(paste0('Welch Two Sample Paired t-test of\nbval of n wells + 2 low concs vs bval of 2 low concs only\nfor the ',nrow(t.test.tb[!grepl('(LDH)|(AB)',acnm)]),' MEA Acute components'))
plotly::ggplotly(p, tooltip = 'all')

Sad, the apid with the smallest min bvals are still not really helped by using 2low only (at least, on average).

Maybe doing the t-test and looking at this aggregate level is not really helpful?

5 Picking this up